Libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2
## ──
## ✔ ggplot2 3.4.1 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ stringr 1.5.0
## ✔ tidyr 1.3.0 ✔ forcats 1.0.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(raster)
## Loading required package: sp
##
## Attaching package: 'raster'
##
## The following object is masked from 'package:dplyr':
##
## select
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(sp)
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(elevatr)
library(ggspatial)
library(ggmap) #citation("ggmap")
## ℹ Google's Terms of Service: <]8;;https://mapsplatform.google.comhttps://mapsplatform.google.com]8;;>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
library(leaflet)
library(htmlwidgets)
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(maps)
##
## Attaching package: 'maps'
##
## The following object is masked from 'package:purrr':
##
## map
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(ggsn)
## Loading required package: grid
##
## Attaching package: 'ggsn'
##
## The following object is masked from 'package:raster':
##
## scalebar
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:purrr':
##
## some
##
## The following object is masked from 'package:dplyr':
##
## recode
library(performance)
library(ggResidpanel)
library(ape)
## Registered S3 method overwritten by 'ape':
## method from
## plot.mst spdep
##
## Attaching package: 'ape'
##
## The following objects are masked from 'package:raster':
##
## rotate, zoom
##
## The following object is masked from 'package:dplyr':
##
## where
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(ggpubr)
##
## Attaching package: 'ggpubr'
##
## The following object is masked from 'package:ape':
##
## rotate
##
## The following object is masked from 'package:raster':
##
## rotate
Data
# Colors to Use
ryg.palette<-c("#DB4325", "#EDA247", "#E6E1BC","#57C4AD", "#006164")
ryg.palette.long<-c("#DB4325", "#EDA247", "#E6E1BC","#57C4AD", "#006164", "grey")
ryg.palette.short<-c("#EDA247", "#E6E1BC","#57C4AD", "#006164")
# API Key for Geocoding:
api_key = "AIzaSyAeoVaM_45SQ4G-lHeci1RhRNbhpxO3BDc"
register_google(key = api_key)
waveone<-read.csv("/Volumes/research/Marshall Fire Health/marshall_w1_deID.csv")
wavetwo<-read.csv("/Volumes/research/Marshall Fire Health/marshall_w2_deID.csv")
wave.one<-waveone %>%
mutate(mailingcity=recode_factor(mailingcity, "Superior"="SUPERIOR", "UNINCORPORATED"="BOULDER")) %>%
mutate(across(air_quality_1:air_quality_4, as.factor)) %>%
mutate(across(remediation_1:remediation_4, as.factor)) %>%
mutate(across(air_cleaning_1:air_cleaning_4, as.factor)) %>%
mutate(group=as.factor(group)) %>%
mutate(ownership_status=recode_factor(ownership_status,
"1"="Owned",
"2"="Rented",
"3"="Owned but Not Living",
"4"="Purchased after Dec. 30, 2021",
"5"="Other")) %>%
mutate(air_quality_1=recode_factor(air_quality_1,
"1"="Strongly disagree",
"2"="Somewhat disagree",
"3"="Neither agree nor disagree",
"4"="Somewhat agree",
"5"="Strongly agree",
"6"="Don't know")) %>%
mutate(air_quality_2=recode_factor(air_quality_2,
"1"="Strongly disagree",
"2"="Somewhat disagree",
"3"="Neither agree nor disagree",
"4"="Somewhat agree",
"5"="Strongly agree",
"6"="Don't know")) %>%
mutate(air_quality_3=recode_factor(air_quality_3,
"1"="Strongly disagree",
"2"="Somewhat disagree",
"3"="Neither agree nor disagree",
"4"="Somewhat agree",
"5"="Strongly agree",
"6"="Don't know")) %>%
mutate(air_quality_4=recode_factor(air_quality_4,
"1"="Strongly disagree",
"2"="Somewhat disagree",
"3"="Neither agree nor disagree",
"4"="Somewhat agree",
"5"="Strongly agree",
"6"="Don't know")) %>%
mutate(home_smell_1week=recode_factor(home_smell_1week,
"0"="No",
"1"="Yes",
"2"="Not sure/don't remember")) %>%
mutate(home_smell_type=recode_factor(home_smell_type,
"1"="Like a campfire",
"2"="Like chemicals or a chemical fire",
"3"="Other")) %>%
mutate(group=recode_factor(group,
"A"="within fire perimeter",
"B"="within 1/2 mile",
"C"="1/2 to 1 mile",
"D"="1 to 2 miles")) %>%
mutate(remediation_1=recode_factor(remediation_1,
"1"="Did this already",
"2"="Plan to do this",
"3"="Neither"))%>%
mutate(remediation_2=recode_factor(remediation_2,
"1"="Did this already",
"2"="Plan to do this",
"3"="Neither"))%>%
mutate(remediation_3=recode_factor(remediation_3,
"1"="Did this already",
"2"="Plan to do this",
"3"="Neither"))%>%
mutate(remediation_4=recode_factor(remediation_4,
"1"="Did this already",
"2"="Plan to do this",
"3"="Neither"))%>%
mutate(air_cleaning=recode_factor(air_cleaning_1,
"1"="Changing air filters in HVAC")) %>%
mutate(air_cleaning=recode_factor(air_cleaning_2,
"1"="Using air cleaners"))%>%
mutate(air_cleaning=recode_factor(air_cleaning_3,
"1"="Hiring someone to clean indoor air")) %>%
mutate(air_cleaning=recode_factor(air_cleaning_4,
"1"="Other")) %>%
mutate(air_cleaning_3=recode_factor(air_cleaning_3,
"1"="Hiring someone to clean indoor air"))%>%
mutate(air_cleaning_4=recode_factor(air_cleaning_4,
"1"="Other")) %>%
mutate(education=recode_factor(education,
"0"="Did not specify",
"1"="High School Graduate",
"2"="GED or Equivalent",
"3"="Some College",
"4"="Associate's Degree",
"5"="Bachelor's Degree",
"6"="Master's Degree",
"7"="Doctoral Degree")) %>%
mutate(income=recode_factor(income,
"1"="Less than $20,000",
"2"="$20,000 to $34,999",
"3"="$35,000 to $49,999",
"4"="$50,000 to $79,999",
"5"="$80,000 to $99,999",
"6"="$100,000 to $149,999",
"7"="$150,000 to $199,999",
"8"="$200,000 or more",
"9"="Prefer not to answer")) %>%
filter(finished==1) %>% #only analyzing finished data, not in wave.two
filter(mailingcity=="BOULDER"|mailingcity=="SUPERIOR"|mailingcity=="LOUISVILLE") #filtering only respondents in Boulder Unincorporated, Superior or Louisville
wave.two<-wavetwo %>%
mutate(air_quality_2=as.factor(air_quality_2)) %>%
mutate(air_quality_4=as.factor(air_quality_4)) %>%
mutate(across(remediation_1:remediation_4, as.factor)) %>%
mutate(across(air_cleaning_1:air_cleaning_4, as.factor)) %>%
mutate(group=as.factor(group)) %>%
mutate(renterowner=recode_factor(renterowner,
"1"="Owned",
"2"="Rented")) %>%
mutate(air_quality_2=recode_factor(air_quality_2,
"1"="Strongly disagree",
"2"="Somewhat disagree",
"3"="Neither agree nor disagree",
"4"="Somewhat agree",
"5"="Strongly agree",
"6"="Don't know")) %>%
mutate(air_quality_4=recode_factor(air_quality_4,
"1"="Strongly disagree",
"2"="Somewhat disagree",
"3"="Neither agree nor disagree",
"4"="Somewhat agree",
"5"="Strongly agree",
"6"="Don't know")) %>%
mutate(group=recode_factor(group,
"A"="within fire perimeter",
"B"="within 1/2 mile",
"C"="1/2 to 1 mile",
"D"="1 to 2 miles")) %>%
mutate(remediation_1=recode_factor(remediation_1,
"1"="Did this already",
"2"="Plan to do this",
"3"="Neither")) %>%
mutate(remediation_2=recode_factor(remediation_2,
"1"="Did this already",
"2"="Plan to do this",
"3"="Neither")) %>%
mutate(remediation_3=recode_factor(remediation_3,
"1"="Did this already",
"2"="Plan to do this",
"3"="Neither")) %>%
mutate(remediation_4=recode_factor(remediation_4,
"1"="Did this already",
"2"="Plan to do this",
"3"="Neither")) %>%
mutate(air_cleaning=recode_factor(air_cleaning_1,
"1"="Changing air filters in HVAC")) %>%
mutate(air_cleaning=recode_factor(air_cleaning_2,
"1"="Using air cleaners")) %>%
mutate(air_cleaning=recode_factor(air_cleaning_3,
"1"="Hiring someone to clean indoor air")) %>%
mutate(air_cleaning=recode_factor(air_cleaning_4,
"1"="Other")) %>%
mutate(income=recode_factor(income,
"1"="Less than $20,000",
"2"="$20,000 to $34,999",
"3"="$35,000 to $49,999",
"4"="$50,000 to $79,999",
"5"="$80,000 to $99,999",
"6"="$100,000 to $149,999",
"7"="$150,000 to $199,999",
"8"="$200,000 or more",
"9"="Prefer not to answer")) %>%
mutate(education=recode_factor(education,
"0"="Did not specify",
"1"="High School Graduate",
"2"="GED or Equivalent",
"3"="Some College",
"4"="Associate's Degree",
"5"="Bachelor's Degree",
"6"="Master's Degree",
"7"="Doctoral Degree"))
Descriptive Survey Statistics (tables)
Number of respondents for Wave 1 and Wave 2
wave.one.only2=filter(wave.one,wave.one$surveyid %in% wave.two$surveyid) #survey respondants in wave two who are not in wave one?
wave.two.only1=filter(wave.two,wave.two$surveyid %in% wave.one.only2$surveyid) #survey respondants in wave two who are not in wave
# nrow(waveone)
# nrow(wavetwo)
Respondants = c(nrow(wave.one),nrow(wave.two), nrow(wave.one.only2))
Wave=c("One","Two","Both")
df1 = cbind(Wave,Respondants) %>%
as.data.frame()
knitr::kable(df1, caption="Total Respondents")
Total Respondents
|
Wave
|
Respondants
|
|
One
|
802
|
|
Two
|
576
|
|
Both
|
568
|
# save_kable(knitr::kable(df1),"../images/w1w2_counts.png")
We utilized survey results of people whom completed the entire survey
for accuracy. Wave one contained 3,462 total respondents with 802 of
them completing the whole survey. Wave two had a total of 576
respondents who finished the survey. There were 568 respondents whom
participated in both surveys, resulting in a 70.82% and 98.61% retention
rate for wave one and wave two respectively.
% male/female
#### Wave One
tblFun(wave.one$Gender3, "Wave One - Gender")
Wave One - Gender
|
|
Count
|
Percentage
|
|
Female
|
473
|
58.98
|
|
Male
|
298
|
37.16
|
|
Other or Declined
|
31
|
3.87
|
#### Wave Two
tblFun(wave.two$Gender3, "Wave Two - Gender")
Wave Two - Gender
|
|
Count
|
Percentage
|
|
Female
|
351
|
60.94
|
|
Male
|
201
|
34.90
|
|
Other or Declined
|
24
|
4.17
|
# table(wave.one.only2$Gender3, wave.two.only1$Gender3)
tblFun(wave.one.only2$Gender3, "Respondents in Both Waves - Gender")
Respondents in Both Waves - Gender
|
|
Count
|
Percentage
|
|
Female
|
350
|
61.62
|
|
Male
|
201
|
35.39
|
|
Other or Declined
|
17
|
2.99
|
chisq.test(wave.one.only2$Gender3, wave.two.only1$Gender3)
## Warning in chisq.test(wave.one.only2$Gender3, wave.two.only1$Gender3):
## Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: wave.one.only2$Gender3 and wave.two.only1$Gender3
## X-squared = 1136, df = 4, p-value < 2.2e-16
Both waves had a total of 350 (61.62%) people select female and 201
(35.39%) people select male, with 17 (3.91%) people selecting other or
declining to state their gender. A majority of respondents were female,
with 58.98% of the respondents selecting female in wave one compared to
60.94% in wave two, seeing a slight increase in female. Similarly, there
was an increase of people in wave one (3.87%) than in wave two (4.17%)
whom selected other or declined to state their gender. 37.16% of
respondents selected male in wave one and 34.90% of them selected male
in wave two. The ^2 test revealed that gender is a dependent variable
and is statistically significant across both waves.
Educational attainment for each wave
#### Wave One
tblFun(wave.one$education, "Wave One - Education")
Wave One - Education
|
|
Count
|
Percentage
|
|
High School Graduate
|
13
|
1.65
|
|
GED or Equivalent
|
1
|
0.13
|
|
Some College
|
43
|
5.46
|
|
Associate’s Degree
|
21
|
2.66
|
|
Bachelor’s Degree
|
310
|
39.34
|
|
Master’s Degree
|
283
|
35.91
|
|
Doctoral Degree
|
117
|
14.85
|
#### Wave Two
tblFun(wave.two$education, "Wave Two - Education")
Wave Two - Education
|
|
Count
|
Percentage
|
|
High School Graduate
|
9
|
1.59
|
|
GED or Equivalent
|
1
|
0.18
|
|
Some College
|
28
|
4.96
|
|
Associate’s Degree
|
13
|
2.30
|
|
Bachelor’s Degree
|
213
|
37.70
|
|
Master’s Degree
|
216
|
38.23
|
|
Doctoral Degree
|
85
|
15.04
|
# table(wave.one.only2$education, wave.two.only1$education)
tblFun(wave.one.only2$education, "Respondents in Both Waves - Education")
Respondents in Both Waves - Education
|
|
Count
|
Percentage
|
|
High School Graduate
|
9
|
1.60
|
|
GED or Equivalent
|
1
|
0.18
|
|
Some College
|
28
|
4.96
|
|
Associate’s Degree
|
13
|
2.30
|
|
Bachelor’s Degree
|
213
|
37.77
|
|
Master’s Degree
|
215
|
38.12
|
|
Doctoral Degree
|
85
|
15.07
|
chisq.test(wave.one.only2$education, wave.two.only1$education)
## Warning in chisq.test(wave.one.only2$education, wave.two.only1$education):
## Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: wave.one.only2$education and wave.two.only1$education
## X-squared = 3384, df = 36, p-value < 2.2e-16
We categorized the education levels of each respondent into seven
categories: high school degree, GED or equivalent degree, some college
education, associate’s degree, bachelor’s degree, masters’s degree, and
doctoral degree. The most common degree for wave one was a bachelor’s
degree, with 39.34% of respondents having this level of education. For
wave two, the most common level of education was a master’s degree with
38.23%. The GED or equivalent category had the least amount of
respondents in general. This was the same for respondents who responded
to both waves, althought slightly less with 38.12% of respondents
attaining a master’s degree. The ^2 test revealed that education is a
dependent variable and is statistically significant across both
waves.
Income for each wave
#### Wave One
tblFun(wave.one$income, "Wave One - Income")
Wave One - Income
|
|
Count
|
Percentage
|
|
Less than $20,000
|
7
|
0.89
|
|
$20,000 to $34,999
|
11
|
1.40
|
|
$35,000 to $49,999
|
27
|
3.45
|
|
$50,000 to $79,999
|
50
|
6.39
|
|
$80,000 to $99,999
|
62
|
7.92
|
|
$100,000 to $149,999
|
148
|
18.90
|
|
$150,000 to $199,999
|
125
|
15.96
|
|
$200,000 or more
|
210
|
26.82
|
|
Prefer not to answer
|
143
|
18.26
|
#### Wave Two
tblFun(wave.two$income, "Wave Two - Income")
Wave Two - Income
|
|
Count
|
Percentage
|
|
Less than $20,000
|
5
|
0.89
|
|
$20,000 to $34,999
|
9
|
1.60
|
|
$35,000 to $49,999
|
19
|
3.37
|
|
$50,000 to $79,999
|
32
|
5.67
|
|
$80,000 to $99,999
|
45
|
7.98
|
|
$100,000 to $149,999
|
110
|
19.50
|
|
$150,000 to $199,999
|
98
|
17.38
|
|
$200,000 or more
|
150
|
26.60
|
|
Prefer not to answer
|
96
|
17.02
|
# table(wave.one.only2$education, wave.two.only1$education)
tblFun(wave.one.only2$income, "Respondents in Both Waves - Income")
Respondents in Both Waves - Income
|
|
Count
|
Percentage
|
|
Less than $20,000
|
5
|
0.89
|
|
$20,000 to $34,999
|
9
|
1.60
|
|
$35,000 to $49,999
|
19
|
3.37
|
|
$50,000 to $79,999
|
32
|
5.68
|
|
$80,000 to $99,999
|
45
|
7.99
|
|
$100,000 to $149,999
|
110
|
19.54
|
|
$150,000 to $199,999
|
98
|
17.41
|
|
$200,000 or more
|
149
|
26.47
|
|
Prefer not to answer
|
96
|
17.05
|
chisq.test(wave.one.only2$income, wave.two.only1$income)
## Warning in chisq.test(wave.one.only2$income, wave.two.only1$income):
## Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: wave.one.only2$income and wave.two.only1$income
## X-squared = 4504, df = 64, p-value < 2.2e-16
Income was categorized nine different bins: Less than $20,000,
$20,000 to $34,999, $35,000 to $49,999, $50,000 to $79,999, $80,000 to
$99,999, $100,000 to $149,999, $150,000 to $199,999, $200,000 or more,
and Prefer not to answer. The most income level for wave one was the
$200,000 or more range, with 39.34% of respondents making this much.
This was the same for wave two as well as for respondents who responded
to both waves, 26.60% and 26.32 respectively. The ^2 test also revealed
that income is a dependent variable and statistically significant across
both waves.
Race/ethnicity
#### Wave One
tblFun(wave.one$RaceEthn2, "Wave One - Race/Ethnicity")
Wave One - Race/Ethnicity
|
|
Count
|
Percentage
|
|
|
35
|
4.36
|
|
POC
|
76
|
9.48
|
|
White
|
691
|
86.16
|
#### Wave Two
tblFun(wave.two$RaceEthn2, "Wave Two - Race/Ethnicity")
Wave Two - Race/Ethnicity
|
|
Count
|
Percentage
|
|
|
23
|
3.99
|
|
POC
|
46
|
7.99
|
|
White
|
507
|
88.02
|
# table(wave.one.only2$education, wave.two.only1$education)
tblFun(wave.one.only2$RaceEthn2, "Respondents in Both Waves - Race/Ethnicity")
Respondents in Both Waves - Race/Ethnicity
|
|
Count
|
Percentage
|
|
|
16
|
2.82
|
|
POC
|
46
|
8.10
|
|
White
|
506
|
89.08
|
chisq.test(wave.one.only2$RaceEthn2, wave.two.only1$RaceEthn2)
## Warning in chisq.test(wave.one.only2$RaceEthn2, wave.two.only1$RaceEthn2):
## Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: wave.one.only2$RaceEthn2 and wave.two.only1$RaceEthn2
## X-squared = 1136, df = 4, p-value < 2.2e-16
For ease of comparison amongst the two groups, we only categorized
race and ethnicity into two different categories, white and person of
color (POC). A majority of people in wave one, wave two, and of
respondents who completed both waves were white, being 86.16%, 88.02%,
and 89.09 respectively. The ^2 test showed that income is a dependent
variable and is statistically significant across both waves.
Owner/Renter
#### Wave One
tblFun(wave.one$ownership_status, "Wave One - Ownership Status")
Wave One - Ownership Status
|
|
Count
|
Percentage
|
|
Owned
|
744
|
93.12
|
|
Rented
|
31
|
3.88
|
|
Owned but Not Living
|
10
|
1.25
|
|
Purchased after Dec. 30, 2021
|
4
|
0.50
|
|
Other
|
10
|
1.25
|
#### Wave Two
tblFun(wave.two$renterowner, "Wave Two - Ownership Status")
Wave Two - Ownership Status
|
|
Count
|
Percentage
|
|
Owned
|
554
|
96.18
|
|
Rented
|
22
|
3.82
|
# table(wave.one.only2$education, wave.two.only1$education)
tblFun(wave.one.only2$ownership_status, "Respondents in Both Waves - Ownership Status")
Respondents in Both Waves - Ownership Status
|
|
Count
|
Percentage
|
|
Owned
|
534
|
94.01
|
|
Rented
|
22
|
3.87
|
|
Owned but Not Living
|
4
|
0.70
|
|
Purchased after Dec. 30, 2021
|
2
|
0.35
|
|
Other
|
6
|
1.06
|
chisq.test(wave.one.only2$ownership_status, wave.two.only1$renterowner)
## Warning in chisq.test(wave.one.only2$ownership_status,
## wave.two.only1$renterowner): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: wave.one.only2$ownership_status and wave.two.only1$renterowner
## X-squared = 568, df = 4, p-value < 2.2e-16
Wave one and wave two had different categories of ownership. Wave one
was categorized into Owned, Rented, Owned but Not Living, Purchased
after Dec. 30, 2021, and Other. Wave two was only categorized into Owned
and Rented. A majority of people in wave one, wave two, and of
respondents who completed both waves owned their homes during the
Marshal Fire, with 93.12%, 96.18%, and 94.01% respectively. The ^2 test
revealed that ownership status is a dependent variable and is
statistically significant across both waves
Impact category
#### Wave One
tblFun(filter(wave.one,wave.one$impact_cat!="")$impact_cat, "Wave One - Impact Category")
Wave One - Impact Category
|
|
Count
|
Percentage
|
|
Complete loss
|
204
|
25.66
|
|
Damaged, living there
|
355
|
44.65
|
|
Damaged, not living there
|
41
|
5.16
|
|
No damage, living there
|
191
|
24.03
|
|
No damage, not living there
|
4
|
0.50
|
#### Wave Two
tblFun(wave.two$impact_cat, "Wave Two - Impact Category")
Wave Two - Impact Category
|
|
Count
|
Percentage
|
|
Complete loss
|
163
|
28.30
|
|
Damaged, living there
|
249
|
43.23
|
|
Damaged, not living there
|
33
|
5.73
|
|
No damage, living there
|
128
|
22.22
|
|
No damage, not living there
|
3
|
0.52
|
# table(wave.one.only2$education, wave.two.only1$education)
tblFun(wave.one.only2$impact_cat, "Respondents in Both - Impact Category")
Respondents in Both - Impact Category
|
|
Count
|
Percentage
|
|
Complete loss
|
161
|
28.35
|
|
Damaged, living there
|
245
|
43.13
|
|
Damaged, not living there
|
33
|
5.81
|
|
No damage, living there
|
126
|
22.18
|
|
No damage, not living there
|
3
|
0.53
|
chisq.test(wave.one.only2$impact_cat, wave.two.only1$impact_cat)
## Warning in chisq.test(wave.one.only2$impact_cat, wave.two.only1$impact_cat):
## Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: wave.one.only2$impact_cat and wave.two.only1$impact_cat
## X-squared = 2272, df = 16, p-value < 2.2e-16
Each participant recorded the amount of damage to their property and
whether or not they lived at the current address. These were combined to
create the impact category, which was seperated into 5 levels; Complete
loss; Damaged, living there; Damaged, not living there; No damage,
living there; No damage, not living there. Most respondents in wave one,
wave two, and of respondents who completed both waves were living at
thier current address while sustaining damage to the property from the
Marshal Fire, with 44.65%, 43.23%, and 43.13% respectively. The ^2 test
revealed that these impact categories are both dependent variable and
statistically significant across both waves.
Distance category
#### Wave One
tblFun(wave.one$group, "Wave One - Distance Category")
Wave One - Distance Category
|
|
Count
|
Percentage
|
|
within fire perimeter
|
465
|
57.98
|
|
within 1/2 mile
|
197
|
24.56
|
|
1/2 to 1 mile
|
74
|
9.23
|
|
1 to 2 miles
|
66
|
8.23
|
|
|
0
|
0.00
|
#### Wave Two
tblFun(wave.two$group, "Wave Two - Distance Category")
Wave Two - Distance Category
|
|
Count
|
Percentage
|
|
within fire perimeter
|
343
|
59.55
|
|
within 1/2 mile
|
139
|
24.13
|
|
1/2 to 1 mile
|
56
|
9.72
|
|
1 to 2 miles
|
38
|
6.60
|
# table(wave.one.only2$education, wave.two.only1$education)
tblFun(wave.one.only2$group, "Respondents in Both Waves - Distance Category")
Respondents in Both Waves - Distance Category
|
|
Count
|
Percentage
|
|
within fire perimeter
|
337
|
59.33
|
|
within 1/2 mile
|
139
|
24.47
|
|
1/2 to 1 mile
|
54
|
9.51
|
|
1 to 2 miles
|
38
|
6.69
|
|
|
0
|
0.00
|
chisq.test(wave.one.only2$group, wave.two.only1$group)
## Warning in chisq.test(wave.one.only2$group, wave.two.only1$group): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: wave.one.only2$group and wave.two.only1$group
## X-squared = 1704, df = 9, p-value < 2.2e-16
The proximity of each participant’s property to the Marshall fire was
split into four categories, within the fire perimeter within a 1/2 mile
of the fire, within 1/2 to 1 mile of the fire, and within 1 to 2 miles
of the fire. Most respondents in wave one, wave two, and of respondents
who completed both waves were within the fire perimeter with 57.96%,
59.55%, and 59.33% respectively. The ^2 test revealed that income is
also a dependent variable and is statistically significant across both
waves.
Physical Health Symptoms
Wave One:
Wave One counts of symptoms bar plot or pie chart
wave1.symptoms=grep("^symptoms_\\d", names(wave.one), value=T)
rnames=c("Dry Cough","Wet Cough","Wheeze","Itchy or watery eyes",
"Sore Throat","Headache","Shortness of Breath",
"Difficult or labored breathing","Sneezing or stuffy nose",
"Nausea or vomiting ","Allergic skin reaction",
"Strange taste in your mouth","None of these")
count=list()
for(i in wave1.symptoms) {
v=sum(!is.na(wave.one[,i]))
count=c(count,v)
}
df_sym=cbind(wave1.symptoms,count,rnames) %>%
as.data.frame() %>%
dplyr::select(rnames,count)
df_sym$count=as.integer(df_sym$count)
colnames(df_sym)=c("Symptom","Count")
knitr::kable(df_sym)
|
Symptom
|
Count
|
|
Dry Cough
|
185
|
|
Wet Cough
|
14
|
|
Wheeze
|
43
|
|
Itchy or watery eyes
|
225
|
|
Sore Throat
|
170
|
|
Headache
|
203
|
|
Shortness of Breath
|
58
|
|
Difficult or labored breathing
|
45
|
|
Sneezing or stuffy nose
|
171
|
|
Nausea or vomiting
|
22
|
|
Allergic skin reaction
|
33
|
|
Strange taste in your mouth
|
75
|
|
None of these
|
387
|
# final_df = df %>%
# mutate(Percent=round(Count/nrow(wave.one)*100,2))
b=barplot(df_sym$Count, names.arg=df_sym$Symptom, las=2,
main="Count of Symptoms")
text(b, df_sym$Count-10, df_sym$Count, font=2)

Wave One table of symptom counts by distance category
count.dist=v=wave.one %>%
filter(!is.na(.[,wave1.symptoms[1]])) %>%
group_by(group) %>%
count(.[,wave1.symptoms[1]]) %>%
as.data.frame() %>%
mutate(symptom=rnames[1]) %>%
dplyr::select(group,symptom,n)
for(i in 2:length(wave1.symptoms)) {
v=wave.one %>%
filter(!is.na(.[,wave1.symptoms[i]])) %>%
group_by(group) %>%
count(.[,wave1.symptoms[i]]) %>%
as.data.frame() %>%
mutate(symptom=rnames[i]) %>%
dplyr::select(group,symptom,n)
count.dist=rbind(count.dist,v)
}
colnames(count.dist)=c("Distance","Symptom", "Count")
ggplot(data=count.dist, aes(x=Symptom,y=Count,fill=Distance)) +
geom_bar(stat="identity") +
labs(title="Symptoms")

ggplot(data=count.dist, aes(x=Distance,y=Count,fill=Symptom)) +
geom_bar(stat="identity") +
labs(title="Symptoms")

Wave One table of symptom counts by impact category
count.impact= filter(wave.one,impact_cat!="") %>%
filter(!is.na(.[,wave1.symptoms[1]])) %>%
group_by(impact_cat) %>%
count(.[,wave1.symptoms[1]]) %>%
as.data.frame() %>%
mutate(symptom=rnames[1]) %>%
dplyr::select(impact_cat,symptom,n)
for(i in 2:length(wave1.symptoms)) {
v=filter(wave.one,impact_cat!="") %>%
filter(!is.na(.[,wave1.symptoms[i]])) %>%
group_by(impact_cat) %>%
count(.[,wave1.symptoms[i]]) %>%
as.data.frame() %>%
mutate(symptom=rnames[i]) %>%
dplyr::select(impact_cat,symptom,n)
count.impact=rbind(count.impact,v)
}
colnames(count.impact)=c("Impact_Category","Symptom", "Count")
ggplot(data=count.impact, aes(x=Symptom,y=Count,fill=Impact_Category)) +
geom_bar(stat="identity") +
labs(title="Symptoms - Impact Category",
subtitle = "I can change the colors of these graphs")

Wave Two:
Wave Two counts of symptoms bar plot or pie chart
wave2.symptoms=grep("^symptoms_\\d", names(wave.two), value=T)[-13:-14] # has 2 more columns
count2=list()
for(i in wave2.symptoms) {
v=sum(wave.two[,i]==1, na.rm=T)
count2=c(count2,v)
}
df_sym2=cbind(wave2.symptoms,count2,rnames) %>%
as.data.frame() %>%
dplyr::select(rnames,count2)
df_sym2$count2=as.integer(df_sym2$count2)
colnames(df_sym2)=c("Symptom","Count")
knitr::kable(df_sym2)
|
Symptom
|
Count
|
|
Dry Cough
|
88
|
|
Wet Cough
|
12
|
|
Wheeze
|
14
|
|
Itchy or watery eyes
|
93
|
|
Sore Throat
|
53
|
|
Headache
|
64
|
|
Shortness of Breath
|
22
|
|
Difficult or labored breathing
|
4
|
|
Sneezing or stuffy nose
|
81
|
|
Nausea or vomiting
|
8
|
|
Allergic skin reaction
|
18
|
|
Strange taste in your mouth
|
14
|
|
None of these
|
389
|
# final_df = df %>%
# mutate(Percent=round(Count/nrow(wave.one)*100,2))
b=barplot(df_sym2$Count, names.arg=df_sym2$Symptom, las=2,
main="Count of Symptoms")
text(b, df_sym2$Count+15, df_sym2$Count, font=2)

Wave Two table of symptom counts by distance category
count2.dist=v=wave.two %>%
filter(!is.na(.[,wave2.symptoms[1]])) %>%
group_by(group) %>%
count(.[,wave2.symptoms[1]]) %>%
as.data.frame() %>%
mutate(symptom=rnames[1]) %>%
dplyr::select(group,symptom,n)
for(i in 2:length(wave1.symptoms)) {
v=wave.two %>%
filter(!is.na(.[,wave2.symptoms[i]])) %>%
group_by(group) %>%
count(.[,wave2.symptoms[i]]) %>%
as.data.frame() %>%
mutate(symptom=rnames[i]) %>%
dplyr::select(group,symptom,n)
count2.dist=rbind(count2.dist,v)
}
colnames(count2.dist)=c("Distance","Symptom", "Count")
ggplot(data=count.dist, aes(x=Symptom,y=Count,fill=Distance)) +
geom_bar(stat="identity") +
labs(title="Wave 2 Symptoms")

ggplot(data=count2.dist, aes(x=Distance,y=Count,fill=Symptom)) +
geom_bar(stat="identity") +
labs(title="Wave 2 Symptoms")

Wave Two table of symptom counts by impact category
count2.impact= filter(wave.two,impact_cat!="") %>%
filter(!is.na(.[,wave2.symptoms[1]])) %>%
filter(.[,wave2.symptoms[1]]==1) %>%
group_by(impact_cat) %>%
count(.[,wave2.symptoms[1]]) %>%
as.data.frame() %>%
mutate(symptom=rnames[1]) %>%
dplyr::select(impact_cat,symptom,n)
for(i in 2:length(wave2.symptoms)) {
v=filter(wave.two,impact_cat!="") %>%
filter(!is.na(.[,wave2.symptoms[i]])) %>%
filter(.[,wave2.symptoms[i]]==1) %>%
group_by(impact_cat) %>%
count(.[,wave2.symptoms[i]]) %>%
as.data.frame() %>%
mutate(symptom=rnames[i]) %>%
dplyr::select(impact_cat,symptom,n)
count2.impact=rbind(count2.impact,v)
}
colnames(count2.impact)=c("Impact_Category","Symptom", "Count")
ggplot(data=count2.impact, aes(x=Symptom,y=Count,fill=Impact_Category)) +
geom_bar(stat="identity") +
labs(title="Symptoms - Impact Category",
subtitle = "I can change the colors of these graphs")

Wave One to Wave Two comparison – physical symptoms (maybe select to
only be the people who responded to both waves)
compare = left_join(df_sym,df_sym2, by="Symptom")
colnames(compare)=c("Symptom","Wave_One","Wave_Two")
df = melt(compare, id=c("Symptom")) %>%
dplyr::mutate(Symptom=rep(1:13,2),
Symptom=recode_factor(Symptom,
"1"=rnames[1],
"2"=rnames[2],
"3"=rnames[3],
"4"=rnames[4],
"5"=rnames[5],
"6"=rnames[6],
"7"=rnames[7],
"8"=rnames[8],
"9"=rnames[9],
"10"=rnames[10],
"11"=rnames[11],
"12"=rnames[12],
"13"=rnames[13]))
ggplot(df) +
geom_bar(aes(x = Symptom, y = value, fill = variable),
stat="identity", position = "dodge", width = 0.7) +
scale_fill_manual("",
values = c("red","blue"),
labels = c(" Wave One", " Wave Two")) +
labs(title="Wave One / Wave Two Comparison",
subtitle="Add counts to top of bars",
x="Symptom",
y="Count") +
theme(plot.title = element_text(hjust=0.5),
plot.subtitle = element_text(hjust=0.5),
plot.caption = element_text(hjust=0.5),
axis.text.x = element_text(angle = 45, hjust = 1))

Comparing Respondants that replied to both surveys
rnames=c("Dry Cough","Wet Cough","Wheeze","Itchy or watery eyes",
"Sore Throat","Headache","Shortness of Breath",
"Difficult or labored breathing","Sneezing or stuffy nose",
"Nausea or vomiting ","Allergic skin reaction",
"Strange taste in your mouth","None of these")
count3=list()
for(i in wave1.symptoms) {
v=sum(!is.na(wave.one.only2[,i]))
count3=c(count3,v)
}
count4=list()
for(i in wave2.symptoms) {
v=sum(wave.two.only1[,i]==1, na.rm = T)
count4=c(count4,v)
}
df_sym3=cbind(wave1.symptoms,count3,rnames) %>%
as.data.frame() %>%
dplyr::select(rnames,count3)
df_sym3$count3=as.integer(df_sym3$count3)
colnames(df_sym3)=c("Symptom","Count")
df_sym4=cbind(wave2.symptoms,count4,rnames) %>%
as.data.frame() %>%
dplyr::select(rnames,count4)
df_sym4$count4=as.integer(df_sym4$count4)
colnames(df_sym4)=c("Symptom","Count")
# final_df = df %>%
# mutate(Percent=round(Count/nrow(wave.one)*100,2))
compare = left_join(df_sym3,df_sym4, by="Symptom")
colnames(compare)=c("Symptom","Wave_One","Wave_Two")
df = melt(compare, id=c("Symptom")) %>%
dplyr::mutate(Symptom=rep(1:13,2),
Symptom=recode_factor(Symptom,
"1"=rnames[1],
"2"=rnames[2],
"3"=rnames[3],
"4"=rnames[4],
"5"=rnames[5],
"6"=rnames[6],
"7"=rnames[7],
"8"=rnames[8],
"9"=rnames[9],
"10"=rnames[10],
"11"=rnames[11],
"12"=rnames[12],
"13"=rnames[13]))
ggplot(df) +
geom_bar(aes(x = Symptom, y = value, fill = variable),
stat="identity", position = "dodge", width = 0.7) +
scale_fill_manual("",
values = c("red","blue"),
labels = c(" Wave One", " Wave Two")) +
labs(title="Wave One / Wave Two Comparison: \n Only Respondants in Both Waves",
subtitle="Add counts to top of bars",
x="Symptom",
y="Count") +
theme(plot.title = element_text(hjust=0.5),
plot.subtitle = element_text(hjust=0.5),
plot.caption = element_text(hjust=0.5),
axis.text.x = element_text(angle = 45, hjust = 1))

Predictors of Physical Health Symptoms
# Linear Regression
# Chi Squared
# G Squared
# ANOVA
# Generalized Additive Models (GAMs)
Discussion: FOR LATER